** Building instrument for market size growth (across cells of the product space,
** which are now defined at the state level

global db "D:\Dropbox\unequal_gains\main_data"
global Section4 "D:\Dropbox\unequal_gains\QJE revision plan\analysis\section4_data"

* we start from the household groups we built in 4c
use "$Section4/age_educ_race_children_state_groups", clear

* (1) first, compute the various shocks with fixed effects in household space (same as 4c.)
sum g_n_annual, d
sum g_n_annual [aw=log((P+P_initial)/2)], d
gen mean_g_n_annual=r(mean)

* linear age controls
regress g_n_annual age_bin [aw=log((P+P_initial)/2)], r
predict g_n_annual_hat 
gen g_n_annual_linageres=g_n_annual-g_n_annual_hat + mean_g_n_annual
sum g_n_annual_linageres [aw=log((P+P_initial)/2)], d
drop g_n_annual_hat

* age fixed effects
regress g_n_annual i.age_bin [aw=log((P+P_initial)/2)], r
predict g_n_annual_hat 
gen g_n_annual_ageres=g_n_annual-g_n_annual_hat + mean_g_n_annual
sum g_n_annual_ageres [aw=log((P+P_initial)/2)], d
drop g_n_annual_hat

* age & education fixed effects
regress g_n_annual i.age_bin i.education [aw=log((P+P_initial)/2)], r
predict g_n_annual_hat 
gen g_n_annual_ageeducres=g_n_annual-g_n_annual_hat + mean_g_n_annual
sum g_n_annual_ageeducres [aw=log((P+P_initial)/2)], d
drop g_n_annual_hat

* age & education & state fixed effects
regress g_n_annual i.age_bin i.education i.statefip [aw=log((P+P_initial)/2)], r
predict g_n_annual_hat 
gen g_n_annual_ageeducstateres=g_n_annual-g_n_annual_hat + mean_g_n_annual
sum g_n_annual_ageeducstateres [aw=log((P+P_initial)/2)], d
drop g_n_annual_hat

* age & education & state & race & children fixed effects
regress g_n_annual i.age_bin i.education i.statefip i.race i.children [aw=log((P+P_initial)/2)], r
predict g_n_annual_hat 
gen g_n_annual_allres=g_n_annual-g_n_annual_hat + mean_g_n_annual
sum g_n_annual_allres [aw=log((P+P_initial)/2)], d
drop g_n_annual_hat

* education fixed effects
regress g_n_annual i.education [aw=log((P+P_initial)/2)], r
predict g_n_annual_hat 
gen g_n_annual_educres=g_n_annual-g_n_annual_hat + mean_g_n_annual
sum g_n_annual_educres [aw=log((P+P_initial)/2)], d
drop g_n_annual_hat

* linear age controls & education fixed effects
regress g_n_annual age i.education [aw=log((P+P_initial)/2)], r
predict g_n_annual_hat 
gen g_n_annual_linageducres=g_n_annual-g_n_annual_hat + mean_g_n_annual
sum g_n_annual_linageducres [aw=log((P+P_initial)/2)], d
drop g_n_annual_hat

* state fixed effects
regress g_n_annual i.statefip [aw=log((P+P_initial)/2)], r
predict g_n_annual_hat 
gen g_n_annual_stateres=g_n_annual-g_n_annual_hat + mean_g_n_annual
sum g_n_annual_stateres [aw=log((P+P_initial)/2)], d
drop g_n_annual_hat

* race fixed effects
regress g_n_annual i.race [aw=log((P+P_initial)/2)], r
predict g_n_annual_hat 
gen g_n_annual_raceres=g_n_annual-g_n_annual_hat + mean_g_n_annual
sum g_n_annual_raceres [aw=log((P+P_initial)/2)], d
drop g_n_annual_hat

* children fixed effects
regress g_n_annual i.children [aw=log((P+P_initial)/2)], r
predict g_n_annual_hat 
gen g_n_annual_childrenres=g_n_annual-g_n_annual_hat + mean_g_n_annual
sum g_n_annual_childrenres [aw=log((P+P_initial)/2)], d
drop g_n_annual_hat

* linear age controls & state fixed effects
regress g_n_annual age i.statefip [aw=log((P+P_initial)/2)], r
predict g_n_annual_hat 
gen g_n_annual_linagestateres=g_n_annual-g_n_annual_hat + mean_g_n_annual
sum g_n_annual_linagestateres [aw=log((P+P_initial)/2)], d
drop g_n_annual_hat

* linear age & race fixed effects
regress g_n_annual age_bin i.race [aw=log((P+P_initial)/2)], r
predict g_n_annual_hat 
gen g_n_annual_linageraceres=g_n_annual-g_n_annual_hat + mean_g_n_annual
sum g_n_annual_linageraceres [aw=log((P+P_initial)/2)], d
drop g_n_annual_hat

* linear age & children fixed effects
regress g_n_annual age_bin i.children [aw=log((P+P_initial)/2)], r
predict g_n_annual_hat 
gen g_n_annual_linagechildres=g_n_annual-g_n_annual_hat + mean_g_n_annual
sum g_n_annual_linagechildres [aw=log((P+P_initial)/2)], d
drop g_n_annual_hat

order g_n_annual*


* (2) Now merge to dataset with loo state share 
rename statefip fips_state_code
merge 1:m fips_state_code age_bin education race children using "$Section4/age_educ_race_children_loostate_shares"
keep if _merge==3

bysort product_module_code quality_rank fips_state_code: egen double spending_l=sum(loo_total_spending)
gen double s_nl=loo_total_spending/spending_l

* build instruments using raw &  residualized shocks (note this is now state-level by product cells,
* for both changes and level)
gen double inst_raw = g_n_annual*s_nl
gen double inst_raw_lvl = log_P_initial*s_nl

foreach i in linageres ageres ageeducres ageeducstateres allres ///
educres stateres linageducres linagestateres linagerace linagechild raceres childrenres {
gen double inst_`i' = g_n_annual_`i'*s_nl
}

collapse (sum) inst_* loo_total_spending P_initial, by(fips_state_code product_module_code quality_rank) fast
rename loo_total_spending total_spend_HMS0406

save "$Section4/instrument_age_educ_race_children_loostate", replace
